[IA64] Final patch to make evtchn fully working for xen/ia64
authorawilliam@xenbuild.aw <awilliam@xenbuild.aw>
Tue, 23 May 2006 15:17:57 +0000 (09:17 -0600)
committerawilliam@xenbuild.aw <awilliam@xenbuild.aw>
Tue, 23 May 2006 15:17:57 +0000 (09:17 -0600)
OK, this is the last one of patch sets, which enables xen/ia64 to work
on event channel mechanism fully. Now user may observe /proc/interrupts
of dom0 as:

           CPU0
 34:         12        Phys-irq  ide0
 39:          0        Phys-irq  acpi
 45:        322        Phys-irq  serial
 48:     115006        Phys-irq  peth0
 49:      16269        Phys-irq  ioc0
 50:         31        Phys-irq  ioc1
 51:          2        Phys-irq  ehci_hcd:usb1
 52:          0        Phys-irq  uhci_hcd:usb2
 53:         55        Phys-irq  uhci_hcd:usb3
256:          0     Dynamic-irq  RESCHED0
257:          0     Dynamic-irq  IPI0
258:      44572     Dynamic-irq  timer0
259:       2316     Dynamic-irq  xenbus
260:       8304     Dynamic-irq  blkif-backend
261:      25947     Dynamic-irq  vif3.0
ERR:          0

Based on this patch, dom0's performance is not affected, while domU's
network output is seen with several percents increase. More important,
now xen/ia64 can progress a base step towards driver domain support!

Signed-off-by Kevin Tian <kevin.tian@intel.com>

linux-2.6-xen-sparse/arch/ia64/kernel/iosapic.c
linux-2.6-xen-sparse/arch/ia64/kernel/irq_ia64.c
linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile
linux-2.6-xen-sparse/arch/ia64/xen/drivers/xenia64_init.c
linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S
linux-2.6-xen-sparse/include/asm-ia64/hw_irq.h
linux-2.6-xen-sparse/include/asm-ia64/irq.h
xen/arch/ia64/linux-xen/entry.S
xen/arch/ia64/xen/hyperprivop.S
xen/arch/ia64/xen/irq.c

index 2f955cb5ab3942f91a6ee4d796458a653fd533b6..ac49687f1bfaef1dcf7fbbddf4d172bc24a3143c 100644 (file)
@@ -670,6 +670,7 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
        iosapic_intr_info[vector].dmode    = delivery;
        iosapic_intr_info[vector].trigger  = trigger;
 
+#ifndef CONFIG_XEN
        if (trigger == IOSAPIC_EDGE)
                irq_type = &irq_type_iosapic_edge;
        else
@@ -682,6 +683,7 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery,
                               __FUNCTION__, vector, idesc->handler->typename, irq_type->typename);
                idesc->handler = irq_type;
        }
+#endif
        return 0;
 }
 
index 4f72108eb4bcb70304dc4533e7924226578f966b..78ff7a224395275f0fa8163146537603142657a0 100644 (file)
@@ -229,6 +229,150 @@ static struct irqaction ipi_irqaction = {
 };
 #endif
 
+#ifdef CONFIG_XEN
+#include <xen/evtchn.h>
+#include <xen/interface/callback.h>
+
+static char timer_name[NR_CPUS][15];
+static char ipi_name[NR_CPUS][15];
+static char resched_name[NR_CPUS][15];
+
+struct saved_irq {
+       unsigned int irq;
+       struct irqaction *action;
+};
+/* 16 should be far optimistic value, since only several percpu irqs
+ * are registered early.
+ */
+#define MAX_LATE_IRQ   16
+static struct saved_irq saved_percpu_irqs[MAX_LATE_IRQ];
+static unsigned short late_irq_cnt = 0;
+static unsigned short saved_irq_cnt = 0;
+static int xen_slab_ready = 0;
+
+/* Dummy stub. Though we may check RESCHEDULE_VECTOR before __do_IRQ,
+ * it ends up to issue several memory accesses upon percpu data and
+ * thus adds unnecessary traffic to other paths.
+ */
+irqreturn_t handle_reschedule(int irq, void *dev_id, struct pt_regs *regs)
+{
+
+       return IRQ_HANDLED;
+}
+
+static struct irqaction resched_irqaction = {
+       .handler =      handle_reschedule,
+       .flags =        SA_INTERRUPT,
+       .name =         "RESCHED"
+};
+
+/*
+ * This is xen version percpu irq registration, which needs bind
+ * to xen specific evtchn sub-system. One trick here is that xen
+ * evtchn binding interface depends on kmalloc because related
+ * port needs to be freed at device/cpu down. So we cache the
+ * registration on BSP before slab is ready and then deal them
+ * at later point. For rest instances happening after slab ready,
+ * we hook them to xen evtchn immediately.
+ *
+ * FIXME: MCA is not supported by far, and thus "nomca" boot param is
+ * required.
+ */
+void
+xen_register_percpu_irq (unsigned int irq, struct irqaction *action, int save)
+{
+       char name[15];
+       unsigned int cpu = smp_processor_id();
+       int ret = 0;
+
+       if (xen_slab_ready) {
+               switch (irq) {
+               case IA64_TIMER_VECTOR:
+                       sprintf(timer_name[cpu], "%s%d", action->name, cpu);
+                       ret = bind_virq_to_irqhandler(VIRQ_ITC, cpu,
+                               action->handler, action->flags,
+                               timer_name[cpu], action->dev_id);
+                       printk(KERN_INFO "register VIRQ_ITC (%s) to xen irq (%d)\n", name, ret);
+                       break;
+               case IA64_IPI_RESCHEDULE:
+                       sprintf(resched_name[cpu], "%s%d", action->name, cpu);
+                       ret = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR, cpu,
+                               action->handler, action->flags,
+                               resched_name[cpu], action->dev_id);
+                       printk(KERN_INFO "register RESCHEDULE_VECTOR (%s) to xen irq (%d)\n", name, ret);
+                       break;
+               case IA64_IPI_VECTOR:
+                       sprintf(ipi_name[cpu], "%s%d", action->name, cpu);
+                       ret = bind_ipi_to_irqhandler(IPI_VECTOR, cpu,
+                               action->handler, action->flags,
+                               ipi_name[cpu], action->dev_id);
+                       printk(KERN_INFO "register IPI_VECTOR (%s) to xen irq (%d)\n", name, ret);
+                       break;
+               default:
+                       printk(KERN_WARNING "Percpu irq %d is unsupported by xen!\n", irq);
+                       break;
+               }
+               BUG_ON(ret < 0);
+       } 
+
+       /* For BSP, we cache registered percpu irqs, and then re-walk
+        * them when initializing APs
+        */
+       if (!cpu && save) {
+               BUG_ON(saved_irq_cnt == MAX_LATE_IRQ);
+               saved_percpu_irqs[saved_irq_cnt].irq = irq;
+               saved_percpu_irqs[saved_irq_cnt].action = action;
+               saved_irq_cnt++;
+               if (!xen_slab_ready)
+                       late_irq_cnt++;
+       }
+}
+
+void
+xen_bind_early_percpu_irq (void)
+{
+       int i;
+
+       xen_slab_ready = 1;
+       /* There's no race when accessing this cached array, since only
+        * BSP will face with such step shortly
+        */
+       for (i = 0; i < late_irq_cnt; i++)
+               xen_register_percpu_irq(saved_percpu_irqs[i].irq,
+                       saved_percpu_irqs[i].action, 0);
+}
+
+/* FIXME: There's no obvious point to check whether slab is ready. So
+ * a hack is used here by utilizing a late time hook.
+ */
+extern void (*late_time_init)(void);
+extern char xen_event_callback;
+extern void xen_init_IRQ(void);
+
+DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
+void xen_smp_intr_init(void)
+{
+#ifdef CONFIG_SMP
+       unsigned int cpu = smp_processor_id();
+       unsigned int i = 0;
+       struct callback_register event = {
+               .type = CALLBACKTYPE_event,
+               .address = (unsigned long)&xen_event_callback,
+       };
+
+       if (!cpu)
+               return;
+
+       /* This should be piggyback when setup vcpu guest context */
+       BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event));
+
+       for (i = 0; i < saved_irq_cnt; i++)
+               xen_register_percpu_irq(saved_percpu_irqs[i].irq,
+                       saved_percpu_irqs[i].action, 0);
+#endif /* CONFIG_SMP */
+}
+#endif /* CONFIG_XEN */
+
 void
 register_percpu_irq (ia64_vector vec, struct irqaction *action)
 {
@@ -237,6 +381,10 @@ register_percpu_irq (ia64_vector vec, struct irqaction *action)
 
        for (irq = 0; irq < NR_IRQS; ++irq)
                if (irq_to_vector(irq) == vec) {
+#ifdef CONFIG_XEN
+                       if (running_on_xen)
+                               return xen_register_percpu_irq(vec, action, 1);
+#endif
                        desc = irq_descp(irq);
                        desc->status |= IRQ_PER_CPU;
                        desc->handler = &irq_type_ia64_lsapic;
@@ -248,7 +396,21 @@ register_percpu_irq (ia64_vector vec, struct irqaction *action)
 void __init
 init_IRQ (void)
 {
+#ifdef CONFIG_XEN
+       /* Maybe put into platform_irq_init later */
+       struct callback_register event = {
+               .type = CALLBACKTYPE_event,
+               .address = (unsigned long)&xen_event_callback,
+       };
+       xen_init_IRQ();
+       BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event));
+       late_time_init = xen_bind_early_percpu_irq;
+#ifdef CONFIG_SMP
+       register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction);
+#endif
+#else /* CONFIG_XEN */
        register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL);
+#endif /* CONFIG_XEN */
 #ifdef CONFIG_SMP
        register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
 #endif
@@ -267,9 +429,32 @@ ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect)
 
 #ifdef CONFIG_XEN
         if (running_on_xen) {
-                extern void xen_send_ipi (int cpu, int vec);
-                xen_send_ipi (cpu, vector);
-                return;
+               int irq = -1;
+
+               /* TODO: we need to call vcpu_up here */
+               if (unlikely(vector == ap_wakeup_vector)) {
+                       extern void xen_send_ipi (int cpu, int vec);
+                       xen_send_ipi (cpu, vector);
+                       //vcpu_prepare_and_up(cpu);
+                       return;
+               }
+
+               switch(vector) {
+               case IA64_IPI_VECTOR:
+                       irq = per_cpu(ipi_to_irq, cpu)[IPI_VECTOR];
+                       break;
+               case IA64_IPI_RESCHEDULE:
+                       irq = per_cpu(ipi_to_irq, cpu)[RESCHEDULE_VECTOR];
+                       break;
+               default:
+                       printk(KERN_WARNING"Unsupported IPI type 0x%x\n", vector);
+                       irq = 0;
+                       break;
+               }               
+       
+               BUG_ON(irq < 0);
+               notify_remote_via_irq(irq);
+               return;
         }
 #endif /* CONFIG_XEN */
 
index 6f30d486082be848551db201f810237676043af0..030dfb9955ab88974ba0c3e41e57472037913010 100644 (file)
@@ -923,6 +923,10 @@ cpu_init (void)
        /* size of physical stacked register partition plus 8 bytes: */
        __get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
        platform_cpu_init();
+#ifdef CONFIG_XEN
+       /* Need to be moved into platform_cpu_init later */
+       xen_smp_intr_init();
+#endif
        pm_idle = default_idle;
 }
 
index 053370e914d80ccf57a2f5c50746861f417c9855..f396542b4410b74755e31d2439b66a9234ddd76e 100644 (file)
@@ -6,7 +6,7 @@ obj-y   := gnttab.o features.o
 obj-$(CONFIG_PROC_FS) += xen_proc.o
 
 ifeq ($(ARCH),ia64)
-obj-y   += evtchn_ia64.o
+obj-y   += evtchn.o
 obj-y   += xenia64_init.o
 ifeq ($(CONFIG_XEN_IA64_DOM0_VP),y)
 obj-$(CONFIG_NET)     += skbuff.o
index 13d89b7174d7e9241d619a5c965c0649fb65089c..4833a4f9284a98e374194ba46e7347142d048ddc 100644 (file)
@@ -33,7 +33,6 @@ int xen_init(void)
                s->arch.start_info_pfn, xen_start_info->nr_pages,
                xen_start_info->flags);
 
-       evtchn_init();
        initialized = 1;
        return 0;
 }
index 5d2529c78792e3a8e18294690e988748ac7fbb49..9ee454cfba3f116c98189f03750abc87eacad619 100644 (file)
@@ -2130,5 +2130,32 @@ non_ia32_syscall:
        mov rp=r15
        br.ret.sptk.many rp
 END(dispatch_to_ia32_handler)
-
 #endif /* CONFIG_IA32_SUPPORT */
+
+#ifdef CONFIG_XEN
+       .section .text,"ax"
+GLOBAL_ENTRY(xen_event_callback)
+       mov r31=pr              // prepare to save predicates
+       ;;
+       SAVE_MIN_WITH_COVER     // uses r31; defines r2 and r3
+       ;;
+       movl r3=XSI_PSR_IC
+       mov r14=1
+       ;;
+       st4 [r3]=r14
+       ;;
+       adds r3=8,r2            // set up second base pointer for SAVE_REST
+       srlz.i                  // ensure everybody knows psr.ic is back on
+       ;;
+       SAVE_REST
+       ;;
+       alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
+       add out0=16,sp          // pass pointer to pt_regs as first arg
+       ;;
+       srlz.d                  // make sure we see the effect of cr.ivr
+       movl r14=ia64_leave_kernel
+       ;;
+       mov rp=r14
+       br.call.sptk.many b6=evtchn_do_upcall
+END(xen_event_callback)
+#endif
index 0cf119b42f7d5c4d1d25ce4eb0e55b9a849d52cb..6f8bd3334d303d2ff7d66cd6a7da7137797d1a61 100644 (file)
 #include <asm/ptrace.h>
 #include <asm/smp.h>
 
+#ifndef CONFIG_XEN
 typedef u8 ia64_vector;
+#else
+typedef u16 ia64_vector;
+#endif
 
 /*
  * 0 special
@@ -86,11 +90,15 @@ extern void free_irq_vector (int vector);
 extern void ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect);
 extern void register_percpu_irq (ia64_vector vec, struct irqaction *action);
 
+#ifndef CONFIG_XEN
 static inline void
 hw_resend_irq (struct hw_interrupt_type *h, unsigned int vector)
 {
        platform_send_ipi(smp_processor_id(), vector, IA64_IPI_DM_INT, 0);
 }
+#else
+extern void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i);
+#endif /* CONFIG_XEN */
 
 /*
  * Default implementations for the irq-descriptor API:
index dbe86c0bbce5f896f25e4934228ac2c36228227a..a86f83b9f35d3bb04256d7e987892f3fbb165f03 100644 (file)
  * 02/29/00     D.Mosberger    moved most things into hw_irq.h
  */
 
+#ifndef CONFIG_XEN
 #define NR_IRQS                256
 #define NR_IRQ_VECTORS NR_IRQS
+#else
+/*
+ * The flat IRQ space is divided into two regions:
+ *  1. A one-to-one mapping of real physical IRQs. This space is only used
+ *     if we have physical device-access privilege. This region is at the 
+ *     start of the IRQ space so that existing device drivers do not need
+ *     to be modified to translate physical IRQ numbers into our IRQ space.
+ *  3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These
+ *     are bound using the provided bind/unbind functions.
+ */
+
+#define PIRQ_BASE              0
+#define NR_PIRQS               256
+
+#define DYNIRQ_BASE            (PIRQ_BASE + NR_PIRQS)
+#define NR_DYNIRQS             256
+
+#define NR_IRQS                        (NR_PIRQS + NR_DYNIRQS)
+#define NR_IRQ_VECTORS         NR_IRQS
+
+#define pirq_to_irq(_x)                ((_x) + PIRQ_BASE)
+#define irq_to_pirq(_x)                ((_x) - PIRQ_BASE)
+
+#define dynirq_to_irq(_x)      ((_x) + DYNIRQ_BASE)
+#define irq_to_dynirq(_x)      ((_x) - DYNIRQ_BASE)
+
+#define RESCHEDULE_VECTOR      0
+#define IPI_VECTOR             1
+#define NR_IPIS                        2
+#endif /* CONFIG_XEN */
 
 /*
  * IRQ line status macro IRQ_PER_CPU is used
index d1b438b9bc8dab62803dfa2cf80aa240c51ec257..c4be56e813750fb7a42a0894a1f5411354022fe0 100644 (file)
@@ -906,17 +906,12 @@ GLOBAL_ENTRY(ia64_leave_kernel)
     ;;
        alloc loc0=ar.pfs,0,1,1,0
        adds out0=16,r12
-    adds r7 = PT(EML_UNAT)+16,r12
+       adds r7 = PT(EML_UNAT)+16,r12
        ;;
-    ld8 r7 = [r7]
+       ld8 r7 = [r7]
        ;;
-#if 0
-leave_kernel_self:
-    cmp.ne p8,p0 = r0, r7
-(p8) br.sptk.few leave_kernel_self
-       ;; 
-#endif
-(pUStk)        br.call.sptk.many b0=deliver_pending_interrupt
+(pUStk)        br.call.sptk.many b0=reflect_event
+//(pUStk)      br.call.sptk.many b0=deliver_pending_interrupt
     ;;
        mov ar.pfs=loc0
        mov ar.unat=r7  /* load eml_unat  */
index 8717f8553100116157baa9933b4110fcffaddbb8..87c4ba51be98784636161691bd1b5a4f25395df7 100644 (file)
@@ -106,6 +106,11 @@ GLOBAL_ENTRY(fast_hyperprivop)
        or r23=r23,r24; or r21=r21,r22;;
        or r20=r23,r21;;
 1:     // when we get to here r20=~=interrupts pending
+       // Check pending event indication
+(p7)   adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r18;;
+(p7)   ld8 r20=[r20];;
+(p7)   adds r20=-1,r20;;
+(p7)   ld1 r20=[r20];;
 
        // HYPERPRIVOP_RFI?
        cmp.eq p7,p6=HYPERPRIVOP_RFI,r17
index 5859964a6389b99ecb44a91ef697d84b9acd1545..a6064f4bc40f5324ba5be3e13ef23fa6efa66585 100644 (file)
  */
 irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
        [0 ... NR_IRQS-1] = {
-               .status = IRQ_DISABLED | IRQ_GUEST,
+               .status = IRQ_DISABLED,
                .handler = &no_irq_type,
                .lock = SPIN_LOCK_UNLOCKED
        }
 };
 
+void __do_IRQ_guest(int irq);
+
 /*
  * Special irq handlers.
  */
@@ -167,9 +169,7 @@ fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
        spin_lock(&desc->lock);
 
        if (desc->status & IRQ_GUEST) {
-               /* __do_IRQ_guest(irq); */
-               vcpu_pend_interrupt(dom0->vcpu[0],irq);
-               vcpu_wake(dom0->vcpu[0]);
+               __do_IRQ_guest(irq);
                spin_unlock(&desc->lock);
                return 1;
        }